## CLEANING GB ##
setwd("set working directory")
library(haven)

# load datasets
UK_W1 <- read_sav("load panel wave 1")
UK_W2 <- read_dta("load panel wave 2")
UK_W3 <- read_dta("load panel wave 3")
UK_W4 <- read_dta("load panel wave 4")
UK_W5 <- read_sav("load panel wave 5")

library(dplyr)

UK_W1 <- UK_W1 %>%
  select(GUNQID:cos1_w1_4)

UK_W2 <- UK_W2 %>%
  select(GUNQID:cos1_w2_4)

UK_W3 <- UK_W3 %>%
  select(GUNQID:cos1_w3_4)

UK_W4 <- UK_W4 %>%
  select(GUNQID:cos1_w4_4)

UK_W1 <- UK_W1 %>%
  rename_with( ~ paste0(sub("_w1*", "", .)), -1)

UK_W2 <- UK_W2 %>%
  rename_with( ~ paste0(sub("_w2*", "", .)), -1)

UK_W3 <- UK_W3 %>%
  rename_with( ~ paste0(sub("_w3*", "", .)), -1)

UK_W4 <- UK_W4 %>%
  rename_with( ~ paste0(sub("_w4*", "", .)), -1)

library(janitor)

data.comp <- compare_df_cols(UK_W1, UK_W2, UK_W3, UK_W4)

names(UK_W4)
UK_W4 <- UK_W4 %>%
  rename(
    education = profile_education_level,
    gender = profile_gender,
    region_GOR = profile_GOR,
    income = profile_gross_household,
    religion = profile_religion,
    socialgrade_cie = profile_socialgrade_cie
  )

table(UK_W1$education_level) # missing variable.

table(UK_W2$education_level) # 20 values

table(UK_W3$education_level) # 20 values

table(UK_W4$profile_education_level) # 20 values


## renaming gender ##

table(UK_W1$gender) # two values

table(UK_W2$gender) # two values

table(UK_W3$gender) # two values

table(UK_W4$profile_gender) # two values



## renaming region_GOR

table(UK_W1$region_GOR) # 11 values

table(UK_W2$region_GOR) # 11 values

table(UK_W3$region_GOR) # 11 values

table(UK_W4$profile_GOR) # 11 values


## renaming gross household

table(UK_W1$gross_household) # missing

table(UK_W2$region_GOR) # 11 values

table(UK_W3$region_GOR) # 11 values

table(UK_W4$profile_GOR) # 11 values



## renaming religion

names(UK_W1)

table(UK_W1$religion) # missing

table(UK_W2$religion) # 19 values

table(UK_W3$religion) # 19 values

table(UK_W4$profile_religion) # 19 values



## renaming social grade

table(UK_W1$new_socgrade) # 2 values

table(UK_W2$socialgrade_cie) # 6 values

table(UK_W3$profile_socialgrade_cie) # 6 values

table(UK_W4$profile_socialgrade_cie) # 6 values

UK_W3 <- UK_W3 %>%
  rename(socialgrade_cie = profile_socialgrade_cie)

UK_W4 <- UK_W4 %>%
  rename(socialgrade_cie = profile_socialgrade_cie)


## checking voted2017 variable


table(UK_W1$voted2017) ## only available from

## checking voted_ge_2019

table(UK_W2$voted_ge_2019)  # 3 values coded

table(UK_W3$voted_ge_2019) # 3 values

table(UK_W4$voted_ge_2019) # 3 values, but values are coded as 1, 2, and 99 rather than 1,2, and 3



### recoding pastvote_ge_2019 ##

table(UK_W2$pastvote_ge_2019) # 1 to 9

table(UK_W3$pastvote_ge_2019) # 1 to 9

table(UK_W4$pastvote_ge_2019) ## 1 to 7 and then 98, and 98



## CHECKING DK
table(UK_W1$track8_w1_1)
table(UK_W1$track9_w1)
table(UK_W1$track10_w1)

### WAVE 1 - UK ###
### WAVE 1 - UK ###
### WAVE 1 - UK ###
### WAVE 1 - UK ###

UK_W1 <- UK_W1 %>%
  select(-c(
    gross_household_income,
    profile_education_level,
    profile_religion
  )) # removing duplicate variables.

table(UK_W1$track1_w1_1) ## 997 Don't know

# purpose_w1
table(UK_W1$purpose_w1) ## SUBSTRACT -1

UK_W1 <- UK_W1 %>%
  mutate(purpose_w1 = purpose_w1 - 1)

# cost5_w1
table(UK_W1$cost5_w1)

UK_W1 <- UK_W1 %>%
  mutate(cost5_w1 = cost5_w1 - 1)

# trust1_w1
table(UK_W1$trust1_w1)

UK_W1 <- UK_W1 %>%
  mutate(trust1_w1 = dplyr::recode(trust1_w1, '12' = 11))

# trust2_w1

table(UK_W1$trust2_w1)

UK_W1 <- UK_W1 %>%
  mutate(trust2_w1 = dplyr::recode(trust2_w1, '12' = 11))

# trust3_w1

table(UK_W1$trust3_w1)

UK_W1 <- UK_W1 %>%
  mutate(trust3_w1 = dplyr::recode(trust3_w1, '12' = 11))


# dem1_w1
table(UK_W1$dem1_w1)



UK_W1 <- UK_W1 %>%
  mutate(dem1_w1 = dplyr::recode(dem1_w1, '12' = 11))

## dnut
UK_W1$dnut_w1
table(UK_W1$dnut_w1)

UK_W1 <- UK_W1 %>%
  mutate(dnut_w1 = dnut_w1 - 1)


# eff1_w1
table(UK_W1$eff1_w1) #ALL GOOD

## leftr_w1.

table(UK_W1$leftrt_w1)

write_sav(UK_W1, "rename panel")

### WAVE 2 - GB ##
### WAVE 2 - GB ##
### WAVE 2 - GB ##
### WAVE 2 - GB ##

## CHECKING DK
table(UK_W2$track8_w2_1)
table(UK_W2$track8_w2_2)
table(UK_W2$track8_w2_4)
table(UK_W2$track8_w2_5)

table(UK_W2$track9_w2)

table(UK_W2$track10_w2)

# purpose_w2
table(UK_W2$purpose_w2)

UK_W2 <- UK_W2 %>%
  mutate(purpose_w2 = purpose_w2 - 1) # DONE,

# cost5_w1
table(UK_W2$cost5_w2)

UK_W2 <- UK_W2 %>%
  mutate(cost5_w2 = cost5_w2 - 1)

# trust1_w1
table(UK_W2$trust1_w2)

UK_W2 <- UK_W2 %>%
  mutate(trust1_w2 = dplyr::recode(trust1_w2, '12' = 11))

# trust2_w1

table(UK_W2$trust2_w2)

UK_W2 <- UK_W2 %>%
  mutate(trust2_w2 = dplyr::recode(trust2_w2, '12' = 11))

# trust3_w1

table(UK_W2$trust3_w2)

UK_W2 <- UK_W2 %>%
  mutate(trust3_w2 = dplyr::recode(trust3_w2, '12' = 11))


# dem1_w1
table(UK_W2$dem1_w2)

UK_W2 <- UK_W2 %>%
  mutate(dem1_w2 = dem1_w2 - 1)


## don1

table(UK_W2$don3_w2)



### WAVE 3 - GB ###
### WAVE 3 - GB ###
### WAVE 3 - GB ###
### WAVE 3 - GB ###


## CHECKING DK
table(UK_W3$track8_w3_1)
table(UK_W3$track8_w3_2)
table(UK_W3$track8_w3_3)
table(UK_W3$track8_w3_4)
table(UK_W3$track8_w3_5)


UK_W3 <- UK_W3 %>%
  mutate(
    track8_w3_1 = track8_w3_1 - 1,
    track8_w3_2 = track8_w3_2 - 1,
    track8_w3_3 = track8_w3_3 - 1,
    track8_w3_4 = track8_w3_4 - 1,
    track8_w3_5 = track8_w3_5 - 1
  )


table(UK_W3$track8_w3_2)
table(UK_W3$track8_w3_3)
table(UK_W3$track8_w3_4)
table(UK_W3$track8_w3_5)

table(UK_W3$track9_w3)

UK_W3 <- UK_W3 %>%
  mutate(track9_w3 = track9_w3 - 1)


table(UK_W3$track10_w3)

UK_W3 <- UK_W3 %>%
  mutate(track10_w3 = track10_w3 - 1)

# purpose_w3
table(UK_W3$purpose_w3)

UK_W3 <- UK_W3 %>%
  mutate(purpose_w3 = purpose_w3 - 1)

# cost5_w1
table(UK_W3$cost5_w3)

UK_W3 <- UK_W3 %>%
  mutate(cost5_w3 = cost5_w3 - 1)

# trust1_w1
table(UK_W3$trust1_w3)

UK_W3 <- UK_W3 %>%
  mutate(trust1_w3 = trust1_w3 - 1)

# trust2_w1

table(UK_W3$trust2_w3)

UK_W3 <- UK_W3 %>%
  mutate(trust2_w3 = trust2_w3 - 1)

# trust3_w1

table(UK_W3$trust3_w3)

UK_W3 <- UK_W3 %>%
  mutate(trust3_w3 = trust3_w3 - 1)


# dem1_w1
table(UK_W3$dem1_w3)

UK_W3 <- UK_W3 %>%
  mutate(dem1_w3 = dem1_w3 - 1)

UK_W3$dnut_w3

UK_W3 <- UK_W3 %>%
  mutate(dnut_w3 = dnut_w3 - 1)

UK_W3$DELseg

write_sav(UK_W3, "rename panel")



### WAVE 4 - GB ###
### WAVE 4 - GB ###
### WAVE 4 - GB ###
### WAVE 4 - GB ###


## CHECKING DK
table(UK_W4$track8_w4_1)
table(UK_W4$track8_w4_2)
table(UK_W4$track8_w4_3)
table(UK_W4$track8_w4_4)
table(UK_W4$track8_w4_5)

table(UK_W4$track9_w4)

table(UK_W4$track10_w4)

# purpose_w4
table(UK_W4$purpose_w4)

UK_W4 <- UK_W4 %>%
  mutate(purpose_w4 = purpose_w4 - 1)

# cost5_w1
table(UK_W4$cost5_w4)

UK_W4 <- UK_W4 %>%
  mutate(cost5_w4 = cost5_w4 - 1)

# trust1_w1
table(UK_W4$trust1_w4)

UK_W4 <- UK_W4 %>%
  mutate(trust1_w4 = dplyr::recode(trust1_w4, '12' = 11))

# trust2_w1

table(UK_W4$trust2_w4)

UK_W4 <- UK_W4 %>%
  mutate(trust2_w4 = dplyr::recode(trust2_w4, '12' = 11))

# trust3_w1

table(UK_W4$trust3_w4)

UK_W4 <- UK_W4 %>%
  mutate(trust3_w4 = dplyr::recode(trust3_w4, '12' = 11))


# dem1_w1
table(UK_W4$dem1_w4)

UK_W4 <- UK_W4 %>%
  mutate(dem1_w4 = dem1_w4 - 1)

# dnut
table(UK_W4$dnut_w4)

UK_W4 <- UK_W4 %>%
  mutate(dnut_w4 = dnut_w4 - 1)


### WAVE 5 - GB ###
### WAVE 5 - GB ###
### WAVE 5 - GB ###
### WAVE 5 - GB ###
library(haven)


library(tidyverse)
UK_W5 <- UK_W5 %>%
  rename(
    education = profile_education_level,
    gender = profile_gender,
    region_GOR = profile_GOR,
    income = profile_gross_household,
    religion = profile_religion,
    socialgrade_cie = profile_socialgrade_cie
  )

## CHECKING DK
names(UK_W5)
table(UK_W5$track8_w5_1)
table(UK_W5$track8_w5_2)
table(UK_W5$track8_w5_3)
table(UK_W5$track8_w5_4)
table(UK_W5$track8_w5_5)

table(UK_W5$track9_w5)

table(UK_W5$track10_w5)

# purpose_w4
table(UK_W5$purpose_w5)

UK_W5 <- UK_W5 %>%
  mutate(purpose_w5 = purpose_w5 - 1)

# cost5_w1
table(UK_W5$cost5_w5)

UK_W5 <- UK_W5 %>%
  mutate(cost5_w5 = cost5_w5 - 1)

# trust1_w1
table(UK_W5$trust1_w5)

UK_W5 <- UK_W5 %>%
  mutate(trust1_w5 = dplyr::recode(trust1_w5, '12' = 11))

# trust2_w1

table(UK_W5$trust2_w5)

UK_W5 <- UK_W5 %>%
  mutate(trust2_w5 = dplyr::recode(trust2_w5, '12' = 11))

# trust3_w1

table(UK_W5$trust3_w5)

UK_W5 <- UK_W5 %>%
  mutate(trust3_w5 = dplyr::recode(trust3_w5, '12' = 11))


# dem1_w1
table(UK_W5$dem1_w5)

UK_W5 <- UK_W5 %>%
  mutate(dem1_w5 = dem1_w5 - 1)

# dnut
table(UK_W5$dnut_w5) # not included

UK_W5 <- UK_W5 %>%
  mutate(dnut_w4 = dnut_w4 - 1)


write_sav(UK_W5, "rename panel") 